SETUP

# RESULTS for report
# 27.03.2021

# Load packages
library(summarytools)
## Registered S3 method overwritten by 'pryr':
##   method      from
##   print.bytes Rcpp
## For best results, restart R session and update pander using devtools:: or remotes::install_github('rapporter/pander')
library(knitr)
library(kableExtra)
library(sjPlot)
library(sjstats)
library(reshape2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
## 
##     group_rows
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(scales)
library(questionr)
## 
## Attaching package: 'questionr'
## The following object is masked from 'package:sjstats':
## 
##     prop
## The following object is masked from 'package:summarytools':
## 
##     freq
# load utility functions
source("./R/utilities.R")



## Load data and specify inclusion/exclusion
  # read data 2004-2018
  hse_combined = read.csv("./data/HSE_combined/hse_combined.csv")
  
  # recode imd
  hse_combined$imd <- as.factor(hse_combined$imd)
  levels(hse_combined$imd) = c(
    "Least deprived",
    "Less deprived",
    "Median deprived",
    "More deprieved",
    "Most deprived"
    )
  
  # EXCLUDE: < 16 years
  hse_combined <- hse_combined[hse_combined$age5 >= 16,]

DATA SET DESCRIPTION

CLICK TO EXPAND

## Data set description

  
# Data rows: overall
nrow(hse_combined)
## [1] 149596
# Data rows: by year
kbl <- by(hse_combined,hse_combined$year, nrow)
kbl <- do.call(rbind, list(kbl))
kbl <- kable(x = kbl, format = "simple", format.args = list(big.mark = ","))
kbl
2003 2004 2005 2006 2008 2010 2011 2012 2014 2017 2018
14,836 6,704 10,303 14,142 15,098 8,420 8,610 8,290 8,077 7,997 8,178
# MISSING eq5d % by year
kbl <- by(hse_combined$eq5d,hse_combined$year, function(x){
  round( sum(is.na(x))/length(x) , 4) * 100
} )
kbl <- do.call(cbind, list(kbl))
kable(x = kbl, format = "simple",col.names  = c("% missing"))
% missing
2003 7.30
2004 8.80
2005 10.60
2006 8.60
2008 6.52
2010 12.92
2011 12.69
2012 12.01
2014 12.28
2017 10.35
2018 11.42
# NON-MISSING: rows with complete data
round(sum(complete.cases(hse_combined))/nrow(hse_combined),4)*100
## [1] 34.66
# MISSING data %; by variable
kbl <- apply(hse_combined,2,function(x){
  round( sum(is.na(x))/nrow(hse_combined) , 4) * 100
} )
kbl
##        year         age         imd         sex          wt        eq5d 
##       26.03       36.84       26.03       26.03       26.03       33.32 
##          mo          sc          ua          pd          ad       hh_id 
##       32.14       32.38       32.30       32.16       32.42       36.84 
##  dis_infect  dis_cancer    dis_endo   dis_blood  dis_mental    dis_nerv 
##       26.06       26.06       26.06       26.06       26.06       26.06 
##     dis_eye     dis_ear    dis_circ    dis_resp  dis_digest  dis_genito 
##       26.06       26.06       26.06       26.06       26.06       26.06 
##    dis_skin dis_musculo    limitill      cigst1      bmival    topqual3 
##       26.06       26.06       26.07       26.55       37.84       26.33 
##     econact        eqv5     acutill    genhelf2    ghq12scr    age5.bin 
##       37.05       43.99       26.08       26.06       42.95       26.03 
##        age5 
##       26.03
## Exclude cases without eq5d for now
# EXCLUDE: EQ5D missing
hse_combined <- hse_combined[!is.na(hse_combined$eq5d),]

# Data rows: overall
nrow(hse_combined)
## [1] 99758
## Descriptives: age5

# distribution: age5 by IMD
ggplot(hse_combined) +
  geom_histogram(aes(age5), col = "lightgray",fill = "cadetblue") +
  facet_wrap(~imd) +
  theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# distribution: age5 by IMD for age5 >= 85
ggplot(hse_combined[hse_combined$age5>75,]) +
  geom_histogram(aes(age5), col = "lightgray",fill = "cadetblue",binwidth = 1) +
  facet_wrap(~imd) +
  theme_minimal()

## Descriptives: by variable
print(dfSummary(
  hse_combined, 
  plain.ascii = FALSE, 
  style = "grid", 
  graph.magnif = 0.75, 
  valid.col = FALSE, 
  tmp.img.dir = "/tmp"
),
method = 'render')

Data Frame Summary

hse_combined

Dimensions: 99758 x 37
Duplicates: 2
No Variable Stats / Values Freqs (% of Valid) Graph Missing
1 year [integer] Mean (sd) : 2009 (4.8) min < med < max: 2003 < 2008 < 2018 IQR (CV) : 7 (0) 11 distinct values 0 (0.0%)
2 age [integer] Mean (sd) : 49.5 (18.6) min < med < max: 16 < 49 < 100 IQR (CV) : 29 (0.4) 85 distinct values 14413 (14.4%)
3 imd [factor] 1. Least deprived 2. Less deprived 3. Median deprived 4. More deprieved 5. Most deprived
21916(22.0%)
21173(21.2%)
20085(20.1%)
19531(19.6%)
17053(17.1%)
0 (0.0%)
4 sex [character] 1. Female 2. Male
55759(55.9%)
43999(44.1%)
0 (0.0%)
5 wt [numeric] Mean (sd) : 1.8 (3.4) min < med < max: 0 < 0.9 < 39.2 IQR (CV) : 0.4 (1.9) 57436 distinct values 0 (0.0%)
6 eq5d [numeric] Mean (sd) : 0.8 (0.2) min < med < max: -0.6 < 1 < 1 IQR (CV) : 0.2 (0.3) 614 distinct values 0 (0.0%)
7 mo [integer] Mean (sd) : 1.2 (0.5) min < med < max: 1 < 1 < 5 IQR (CV) : 0 (0.4)
1:80421(80.6%)
2:17546(17.6%)
3:1033(1.0%)
4:674(0.7%)
5:84(0.1%)
0 (0.0%)
8 sc [integer] Mean (sd) : 1.1 (0.3) min < med < max: 1 < 1 < 5 IQR (CV) : 0 (0.3)
1:93935(94.2%)
2:4912(4.9%)
3:703(0.7%)
4:155(0.2%)
5:53(0.1%)
0 (0.0%)
9 ua [integer] Mean (sd) : 1.2 (0.5) min < med < max: 1 < 1 < 5 IQR (CV) : 0 (0.4)
1:82251(82.5%)
2:14434(14.5%)
3:2467(2.5%)
4:460(0.5%)
5:146(0.1%)
0 (0.0%)
10 pd [integer] Mean (sd) : 1.4 (0.6) min < med < max: 1 < 1 < 5 IQR (CV) : 1 (0.4)
1:63776(63.9%)
2:29849(29.9%)
3:5278(5.3%)
4:667(0.7%)
5:188(0.2%)
0 (0.0%)
11 ad [integer] Mean (sd) : 1.3 (0.5) min < med < max: 1 < 1 < 5 IQR (CV) : 0 (0.4)
1:78264(78.5%)
2:18120(18.2%)
3:2860(2.9%)
4:346(0.3%)
5:168(0.2%)
0 (0.0%)
12 hh_id [integer] Mean (sd) : 966595.5 (564645.6) min < med < max: 101011 < 1014021 < 2371301 IQR (CV) : 926270 (0.6) 38816 distinct values 14413 (14.4%)
13 dis_infect [integer] Min : 0 Mean : 0 Max : 1
0:99557(99.8%)
1:181(0.2%)
20 (0.0%)
14 dis_cancer [integer] Min : 0 Mean : 0 Max : 1
0:97563(97.8%)
1:2175(2.2%)
20 (0.0%)
15 dis_endo [integer] Min : 0 Mean : 0.1 Max : 1
0:91180(91.4%)
1:8558(8.6%)
20 (0.0%)
16 dis_blood [integer] Min : 0 Mean : 0 Max : 1
0:98830(99.1%)
1:908(0.9%)
20 (0.0%)
17 dis_mental [integer] Min : 0 Mean : 0 Max : 1
0:95152(95.4%)
1:4586(4.6%)
20 (0.0%)
18 dis_nerv [integer] Min : 0 Mean : 0 Max : 1
0:95754(96.0%)
1:3984(4.0%)
20 (0.0%)
19 dis_eye [integer] Min : 0 Mean : 0 Max : 1
0:97336(97.6%)
1:2402(2.4%)
20 (0.0%)
20 dis_ear [integer] Min : 0 Mean : 0 Max : 1
0:97347(97.6%)
1:2391(2.4%)
20 (0.0%)
21 dis_circ [integer] Min : 0 Mean : 0.1 Max : 1
0:86427(86.7%)
1:13311(13.3%)
20 (0.0%)
22 dis_resp [integer] Min : 0 Mean : 0.1 Max : 1
0:90986(91.2%)
1:8752(8.8%)
20 (0.0%)
23 dis_digest [integer] Min : 0 Mean : 0.1 Max : 1
0:94622(94.9%)
1:5116(5.1%)
20 (0.0%)
24 dis_genito [integer] Min : 0 Mean : 0 Max : 1
0:97322(97.6%)
1:2416(2.4%)
20 (0.0%)
25 dis_skin [integer] Min : 0 Mean : 0 Max : 1
0:98133(98.4%)
1:1605(1.6%)
20 (0.0%)
26 dis_musculo [integer] Min : 0 Mean : 0.2 Max : 1
0:80581(80.8%)
1:19157(19.2%)
20 (0.0%)
27 limitill [integer] Mean (sd) : 2.3 (0.8) min < med < max: 1 < 3 < 3 IQR (CV) : 2 (0.4)
1:25261(25.3%)
2:20030(20.1%)
3:54438(54.6%)
29 (0.0%)
28 cigst1 [integer] Mean (sd) : 2.2 (1.2) min < med < max: 1 < 2 < 4 IQR (CV) : 2 (0.6)
1:46985(47.2%)
2:5546(5.6%)
3:26767(26.9%)
4:20275(20.4%)
185 (0.2%)
29 bmival [numeric] Mean (sd) : 27.3 (5.3) min < med < max: 10.6 < 26.6 < 81.8 IQR (CV) : 6.5 (0.2) 67644 distinct values 12389 (12.4%)
30 topqual3 [integer] Mean (sd) : 3.8 (2.2) min < med < max: 1 < 4 < 7 IQR (CV) : 4 (0.6)
1:21670(21.8%)
2:11256(11.3%)
3:14010(14.1%)
4:21873(22.0%)
5:4603(4.6%)
6:2068(2.1%)
7:24152(24.2%)
126 (0.1%)
31 econact [integer] Mean (sd) : 2 (1.2) min < med < max: 1 < 1 < 4 IQR (CV) : 2 (0.6)
1:46156(54.2%)
2:3776(4.4%)
3:22112(25.9%)
4:13191(15.5%)
14523 (14.6%)
32 eqv5 [integer] Mean (sd) : 3.1 (1.4) min < med < max: 1 < 3 < 5 IQR (CV) : 2 (0.5)
1:13830(17.9%)
2:14636(18.9%)
3:15808(20.5%)
4:16520(21.4%)
5:16458(21.3%)
22506 (22.6%)
33 acutill [integer] Mean (sd) : 1.4 (1.1) min < med < max: 1 < 1 < 5 IQR (CV) : 0 (0.8)
1:83354(83.6%)
2:4580(4.6%)
3:2597(2.6%)
4:2813(2.8%)
5:6364(6.4%)
50 (0.1%)
34 genhelf2 [integer] Mean (sd) : 1.3 (0.6) min < med < max: 1 < 1 < 3 IQR (CV) : 1 (0.5)
1:74529(74.7%)
2:18316(18.4%)
3:6893(6.9%)
20 (0.0%)
35 ghq12scr [integer] Mean (sd) : 1.4 (2.6) min < med < max: 0 < 0 < 12 IQR (CV) : 1 (1.9) 13 distinct values 16304 (16.3%)
36 age5.bin [integer] Mean (sd) : 8.5 (3.8) min < med < max: 1 < 8 < 17 IQR (CV) : 5 (0.4) 17 distinct values 0 (0.0%)
37 age5 [integer] Mean (sd) : 47.8 (18.5) min < med < max: 16 < 45 < 90 IQR (CV) : 25 (0.4) 17 distinct values 0 (0.0%)

Generated by summarytools 0.9.8 (R version 4.0.2)
2021-03-28



TABLES 1+2 and FIGURES 1 + 2: Mean EQ-5D scores

# --------------------------------------------------------

## Descriptive stats: regression model
#  > `eq5d = age5 x imd + year`
# age5, imd, and year are treated as factors

imd_lvls <- levels(hse_combined$imd)
uniq_age5 <- unique(hse_combined$age5)
uniq_age5 <- uniq_age5[order(uniq_age5)]
uniq_age5_labels <- c("16-17?","18-29?",paste0(uniq_age5[-c(1,2,17)],"-",uniq_age5[-c(1,2,17)]+4),"90+")


# descriptive regression model - FEMALES
descr_lm_f <- lm(eq5d ~  as.factor(age5) * imd + as.factor(year),hse_combined[hse_combined$sex =="Female",], weights = wt)
# descriptive regression model - ALES
descr_lm_m <- lm(eq5d ~  as.factor(age5) * imd + as.factor(year),hse_combined[hse_combined$sex =="Male",], weights = wt)
# BUT WHICH YEAR SHOULD WE USE TO PREDICT MEANS?

  # SIDENOTE: Year doesnt really matter
  # 2017 and 2018 are signficantly lower - maybe because of 5L?
  year_coef_indices <- grepl("year",names(descr_lm_f$coefficients))
  cbind(
    descr_lm_f$coefficients[year_coef_indices],
    confint(descr_lm_f)[year_coef_indices,]
  )
##                                          2.5 %       97.5 %
## as.factor(year)2004 -0.0044214970 -0.011476447  0.002633453
## as.factor(year)2005 -0.0022866283 -0.013619189  0.009045932
## as.factor(year)2006  0.0013514399 -0.008051916  0.010754795
## as.factor(year)2008 -0.0062498539 -0.015473651  0.002973944
## as.factor(year)2010 -0.0069759007 -0.018092429  0.004140627
## as.factor(year)2011 -0.0323490885 -0.043375510 -0.021322666
## as.factor(year)2012 -0.0041028705 -0.015230639  0.007024898
## as.factor(year)2014  0.0008294763 -0.010373537  0.012032490
## as.factor(year)2017 -0.0246095991 -0.035806140 -0.013413058
## as.factor(year)2018 -0.0306793571 -0.041840440 -0.019518274
# REGRESSION MODEL WITHOUT YEAR AS PREDICTOR
  # descriptive regression model - FEMALES
  descr_lm_f <- lm(eq5d ~  as.factor(age5) * imd ,hse_combined[hse_combined$sex =="Female",], weights = wt)
  # descriptive regression model - MALES
  descr_lm_m <- lm(eq5d ~  as.factor(age5) * imd ,hse_combined[hse_combined$sex =="Male",], weights = wt)
  
# predict means by strata
# QUESTION: WHICH YEAR SHOULD WE USE TO PREDICT MEANS?
# OR BETTER USE EMPIRICAL AGGREGATE MEANS?
pred_df_f <- pred_df_m <- data.frame(age5 = rep(uniq_age5,5),imd = rep(imd_lvls,each = length(uniq_age5)))
pred_df_f_ratio <- pred_df_m_ratio <- data.frame(age5 = rep(uniq_age5,2),imd = rep(imd_lvls[c(1,5)],each = length(uniq_age5)))


# FIGURE 1
eq5d_pred_m <- predict(descr_lm_m, newdata = pred_df_m,interval  ="predict")
## Warning in predict.lm(descr_lm_m, newdata = pred_df_m, interval = "predict"): Assuming constant prediction variance even though model fit is weighted
eq5d_pred_m_plot <- cbind(pred_df_m, eq5d_pred_m)

figure1 <- ggplot(eq5d_pred_m_plot) +
    # geom_ribbon(aes(x=age5, ymin = lwr, ymax = upr, fill = imd), alpha = 0.3) + 
    geom_point(aes(x=age5, y = fit, col = imd)) +
    geom_line(aes(x=age5, y = fit, col = imd)) +
    ylim(c(0,1)) +
    ylab("Mean EQ-5D index") +
    xlab("Age group") +
    scale_x_continuous(name = "Age group", breaks = uniq_age5, labels = uniq_age5_labels) +
    ggtitle("Pooled mean EQ-5D Score by IMD quintile and age - MALE") +
    theme_minimal() +
    theme(
      axis.text.x = element_text(angle = 30),
      legend.position = "top")
figure1

ggsave(plot = figure1, filename = "./outputs/figure1.jpg",height = 5, width = 7)

# TABLE 1
eq5d_pred_m <- formatC(eq5d_pred_m,digits = 2,format = "f")
eq5d_pred_m <- paste0(eq5d_pred_m[,1]," (",eq5d_pred_m[,2],"; ",eq5d_pred_m[,3],")")
pred_df_m$eq5d <- eq5d_pred_m


# the inequality ratio ci are bootstrapped 
ineq_m <- ineq_boot1(hse_combined, "Male",boot_iter = 1000)
## Loading required package: data.table
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:ggplot2':
## 
##     :=
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following objects are masked from 'package:reshape2':
## 
##     dcast, melt
ineq_m_formated <- formatC(as.matrix(ineq_m$plot_df),digits = 2,format = "f")
ineq_m_formated <- paste0(ineq_m_formated[,2]," (",ineq_m_formated[,3],"; ",ineq_m_formated[,4],")")

pred_df_m <- reshape(pred_df_m,direction = "wide",timevar = "imd" ,idvar = "age5")
pred_df_m <- cbind(pred_df_m, "most/least ratio" = ineq_m_formated)
pred_df_m
write.csv(pred_df_m, "./outputs/table1.csv", row.names = F)


# FIGURE 2
eq5d_pred_f <- predict(descr_lm_f, newdata = pred_df_f,interval  ="predict")
## Warning in predict.lm(descr_lm_f, newdata = pred_df_f, interval = "predict"): Assuming constant prediction variance even though model fit is weighted
eq5d_pred_f_plot <- cbind(pred_df_f, eq5d_pred_f)

figure2 <- ggplot(eq5d_pred_f_plot) +
  # geom_ribbon(aes(x=age5, ymin = lwr, ymax = upr, fill = imd), alpha = 0.3) + 
  geom_point(aes(x=age5, y = fit, col = imd)) +
  geom_line(aes(x=age5, y = fit, col = imd)) +
  ylim(c(0,1)) +
  ylab("Mean EQ-5D index") +
  xlab("Age group") +
  scale_x_continuous(name = "Age group", breaks = uniq_age5, labels = uniq_age5_labels) +
  ggtitle("Pooled mean EQ-5D Score by IMD quintile and age - FEMALE") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 30),
    legend.position = "top")
figure2

ggsave(plot = figure2, filename = "./outputs/figure2.jpg",height = 5, width = 7)

# TABLE 2
eq5d_pred_f <- formatC(eq5d_pred_f,digits = 2,format = "f")
eq5d_pred_f <- paste0(eq5d_pred_f[,1]," (",eq5d_pred_f[,2],"; ",eq5d_pred_f[,3],")")
pred_df_f$eq5d <- eq5d_pred_f

# bootstrap ineq ratio
ineq_f <- ineq_boot1(hse_combined, "Female",boot_iter = 1000)
ineq_f_formated <- formatC(as.matrix(ineq_f$plot_df),digits = 2,format = "f")
ineq_f_formated <- paste0(ineq_f_formated[,2]," (",ineq_f_formated[,3],"; ",ineq_f_formated[,4],")")


pred_df_f <- reshape(pred_df_f,direction = "wide",timevar = "imd" ,idvar = "age5")
pred_df_f <- cbind(pred_df_f, "most/least ratio" = ineq_f_formated)
pred_df_f
write.csv(pred_df_f, "./outputs/table2.csv", row.names = F)

SIDENOTE: Any difference in variance between male and female?

# Yes, females have about 7% higher SD
  eq5d_sd_sex_tot <- aggregate(eq5d ~ sex, hse_combined, sd)
  (eq5d_sd_sex_tot$eq5d[1] / eq5d_sd_sex_tot$eq5d[2] - 1) * 100
## [1] 7.038993
  eq5d_sd_sex <- aggregate(eq5d ~ sex + age5, hse_combined, sd)
  eq5d_sd_sex <- reshape(eq5d_sd_sex,direction = "wide",timevar = "sex" ,idvar = "age5")
  eq5d_sd_sex$female_sd_proz <-  ( (eq5d_sd_sex$eq5d.Female / eq5d_sd_sex$eq5d.Male) - 1) * 100
  eq5d_sd_sex

FIGURE 3 + 4 (variations): Ratio of IMD1/5 mean EQ-5D score

# FIGURE 3 - COMBINED
ineq_m_plot <- reshape2::melt(data.frame(ineq_m$plot_df),id.vars = "age5")
ineq_m_plot$sex = "Male"
ineq_f_plot <- reshape2::melt(data.frame(ineq_f$plot_df),id.vars = "age5")
ineq_f_plot$sex = "Female"
ineq_plot <- rbind(ineq_m_plot,ineq_f_plot)
ineq_plot <- reshape(ineq_plot,direction = "wide",timevar = "variable",idvar = c("age5","sex"))

figure3c <- ineqPlotter(ineq_plot)
figure3c

ggsave(plot = figure3c, filename = "./outputs/figure3c.jpg",height = 5, width = 7)

figure3c_ci <- ineqPlotter(ineq_plot,ci = T)
ggsave(plot = figure3c_ci, filename = "./outputs/figure3c_ci.jpg",height = 5, width = 7)

# FIGURE 3
figure3 <- ineqPlotter(ineq_plot[ineq_plot$sex == "Male",], ci = T, title = "Inequality index of EQ-5D scores by age - Males")
ggsave(plot = figure3, filename = "./outputs/figure3.jpg",height = 5, width = 7)

figure3_tbl <- figTabler(ineq_plot[ineq_plot$sex == "Male",-2])
write.csv(figure3_tbl, "./outputs/figure3_tbl.csv",row.names = F)

# FIGURE 4 
figure4 <- ineqPlotter(ineq_plot[ineq_plot$sex == "Female",], ci = T, title = "Inequality index of EQ-5D scores by age - Females")
ggsave(plot = figure4, filename = "./outputs/figure4.jpg",height = 5, width = 7)

figure4_tbl <- figTabler(ineq_plot[ineq_plot$sex == "Female",-2])
write.csv(figure4_tbl, "./outputs/figure4_tbl.csv",row.names = F)

FIGURES 5 + 6: Absolute difference in IMD1/5 mean EQ-5D score

# FIGURE 5
ineq_plot_abs_m <- ineq_boot1(hse_combined,"Male",boot_iter = 1000, abs = T)
ineq_plot_abs_m$p1

ggsave(plot = ineq_plot_abs_m$p1,"./outputs/figure5.jpg",height = 5, width = 7)

figure5_tbl <- figTabler(ineq_plot_abs_m$plot_df)
figure5_tbl
write.csv(figure5_tbl, "./outputs/figure5_tbl.csv",row.names = F)

# FIGURE 6
ineq_plot_abs_f <- ineq_boot1(hse_combined,"Female",boot_iter = 1000, abs = T)
ineq_plot_abs_f$p1

ggsave(plot = ineq_plot_abs_f$p1,"./outputs/figure6.jpg",height = 5, width = 7)


figure6_tbl <- figTabler(ineq_plot_abs_f$plot_df)
figure6_tbl
write.csv(figure6_tbl, "./outputs/figure6_tbl.csv",row.names = F)

FIGURES 9 + 10: Proportion reporting each level of response for each EQ-5D dimension

# FIGURE 9

# ONLY FOR 3L DIMENSIONS 2003-2014 !
# EXCLUDING 2017 + 2018

# source("./R/utilities.R")

dim_agg1 <- dimGetter(hse_combined[hse_combined$year <= 2014,], 1)
dim_agg2 <- dimGetter(hse_combined[hse_combined$year <= 2014,], 2)
dim_agg3 <- dimGetter(hse_combined[hse_combined$year <= 2014,], 3)

dim_aggs <- merge(dim_agg1,dim_agg2, all.x=T, all.y = T)
dim_aggs <- merge(dim_aggs,dim_agg3, all.x=T, all.y = T)

dim_aggs_long = melt(dim_aggs,id.vars = c("imd","sex","age5","level"))
dim_aggs_long_rel <- dim_aggs_long[grepl("rel",dim_aggs_long$variable),]
# dim_aggs_long_rel$value[is.na(dim_aggs_long_rel$value)] <- 0
dim_aggs_long_rel$dimension = substr(dim_aggs_long_rel$variable,1,2)


dim_aggs_long_rel$dimension = factor(dim_aggs_long_rel$dimension, 
                                     levels=c("mo","sc","ua","pd","ad")
)

dim_aggs_long_rel$dimension <- recode(
  dim_aggs_long_rel$dimension,
  mo = "Mobility",
  sc = "Self-care",
  ua = "Usual activities",
  pd = "Pain/Discomfort",
  ad = "Anxiety/Depression"
)

 


col_labs <- c("No problems","Some problems","Extreme problems")

figure9 <-  ggplot(dim_aggs_long_rel[dim_aggs_long_rel$sex=="Male",], 
                    aes(x=as.factor(age5), y=value,fill=as.factor(level),
                        col=as.factor(level))) + 
  geom_col(position = 'stack',alpha=0.6)   +
  facet_grid(dimension ~ imd)  +
  scale_color_manual(values = c("lightgreen","orange","red"),
                     labels = c(col_labs),name="Level")  +
  scale_fill_manual(values = c("lightgreen","orange","red"),
                    labels = c(col_labs),name="Level")  +
  xlab("Age group") +
  ylab("Proportion") +
  theme_minimal()+
  theme(legend.position = "bottom") +
  ggtitle("Proportion reporting each level of response for each EQ-5D dimension over age - Males") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45),
    legend.position = "top")
figure9
## Warning: Removed 2663 rows containing missing values (position_stack).

ggsave(plot = figure9, filename = "./outputs/figure9.jpg",height = 8, width = 9)
## Warning: Removed 2663 rows containing missing values (position_stack).
figure10 <-  ggplot(dim_aggs_long_rel[dim_aggs_long_rel$sex=="Female",], 
             aes(x=as.factor(age5), y=value,fill=as.factor(level),
                 col=as.factor(level))) + 
  geom_col(position = 'stack',alpha=0.6)   +
  facet_grid(dimension ~ imd)  +
  scale_color_manual(values = c("lightgreen","orange","red"),
                     labels = c(col_labs),name="Level")  +
  scale_fill_manual(values = c("lightgreen","orange","red"),
                    labels = c(col_labs),name="Level")  +
    xlab("Age group") +
    ylab("Proportion") +
  theme_minimal()+
  theme(legend.position = "bottom") +
    ggtitle("Proportion reporting each level of response for each EQ-5D dimension over age - Females") +
    theme_minimal() +
    theme(
      axis.text.x = element_text(angle = 45),
      legend.position = "top")
figure10
## Warning: Removed 2646 rows containing missing values (position_stack).

ggsave(plot = figure10, filename = "./outputs/figure10.jpg",height = 8, width = 9)
## Warning: Removed 2646 rows containing missing values (position_stack).

FIGURES 11 - 16: IMD1/IMD5 ratio of % reporting problems for each EQ-5D dimension

# ------------------------------------------------------------------------
#### Line graph showing ratio of IMD1/IMD5 for each EQ-5D dimension over age

# FIGURE 11
figure11 <- dimProbPlotter(dim_aggs_long_rel,1,label = "No Problems","Male")
figure11

ggsave(plot = figure11, filename = "./outputs/figure11.jpg",height = 8, width = 9)

# FIGURE 12
figure12 <- dimProbPlotter(dim_aggs_long_rel,1,label = "No Problems","Female")
figure12

ggsave(plot = figure12, filename = "./outputs/figure12.jpg",height = 8, width = 9)

# FIGURE 13
figure13 <- dimProbPlotter(dim_aggs_long_rel,2,label = "Some Problems","Male")
figure13

ggsave(plot = figure13, filename = "./outputs/figure13.jpg",height = 8, width = 9)

# FIGURE 14
figure14 <- dimProbPlotter(dim_aggs_long_rel,2,label = "Some Problems","Female")
figure14

ggsave(plot = figure14, filename = "./outputs/figure14.jpg",height = 8, width = 9)

# FIGURE 15
figure15 <- dimProbPlotter(dim_aggs_long_rel,3,label = "Extreme Problems","Male")
figure15
## Warning: Removed 6 row(s) containing missing values (geom_path).

ggsave(plot = figure15, filename = "./outputs/figure15.jpg",height = 8, width = 9)
## Warning: Removed 6 row(s) containing missing values (geom_path).
# FIGURE 16
figure16 <- dimProbPlotter(dim_aggs_long_rel,3,label = "Extreme Problems","Female")
figure16
## Warning: Removed 6 row(s) containing missing values (geom_path).

ggsave(plot = figure16, filename = "./outputs/figure16.jpg",height = 8, width = 9)
## Warning: Removed 6 row(s) containing missing values (geom_path).



fin.